In [1]:
from IPython.display import Image
Image(url='http://1.bp.blogspot.com/-ME24ePzpzIM/UQLWTwurfXI/AAAAAAAAANw/W3EETIroA80/s1600/drop_shadows_background.png',
      width=1000, height=1000)


Out[1]:

Clasificación

SVM


In [1]:
import numpy
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap


/usr/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2499: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
  VisibleDeprecationWarning)

Creamos nuestro dataset y sus respectivas etiquetas para el entrenamiento


In [2]:
xs = np.array([[np.random.randint(i-10, i), np.random.randint(i-10, i)] for i in [10, 20] for _ in range(100)])
y = [i for i in [1, 2] for _ in range(100)]
print xs.shape, y


(200, 2) [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

Entrenamos ...


In [3]:
sv = SVC(kernel='linear') #kernel='rbf'

In [4]:
sv.fit(xs, y)


Out[4]:
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

Y consultamos la ubicación de un nuevo individuo ..


In [5]:
sv.predict([17,5])


Out[5]:
array([2])

Obtenemos la función de decisión para observar la clasificación obtenida


In [6]:
XX_svm, YY_svm = np.mgrid[0:20:1000j, 0:20:1000j]
Z_svm = sv.decision_function(np.c_[XX_svm.ravel(), YY_svm.ravel()])
Z_svm = Z_svm.reshape(XX_svm.shape)
print Z_svm


[[ -9.5         -9.48998999  -9.47997998 ...,   0.47997998   0.48998999
    0.5       ]
 [ -9.48998999  -9.47997998  -9.46996997 ...,   0.48998999   0.5
    0.51001001]
 [ -9.47997998  -9.46996997  -9.45995996 ...,   0.5          0.51001001
    0.52002002]
 ..., 
 [  0.47997998   0.48998999   0.5        ...,  10.45995996  10.46996997
   10.47997998]
 [  0.48998999   0.5          0.51001001 ...,  10.46996997  10.47997998
   10.48998999]
 [  0.5          0.51001001   0.52002002 ...,  10.47997998  10.48998999
   10.5       ]]

In [7]:
plt.plot(xs[:100, 0],xs[:100, 1],'ro')
plt.plot(xs[100:, 0],xs[100:, 1],'go')

plt.contour(XX_svm, YY_svm, Z_svm, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
plt.pcolormesh(XX_svm, YY_svm, Z_svm > 0,  cmap=plt.cm.Paired)
plt.xlim(0, 20)
plt.ylim(0, 20)


Out[7]:
(0, 20)

Ahora cambie el tipo de kernel de la SVM y vuelva a graficar, que cambio?

K-neighbors


In [8]:
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid

In [9]:
kn = KNeighborsClassifier(n_neighbors=6, weights='uniform')

In [10]:
kn.fit(xs, y)


Out[10]:
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           n_neighbors=6, p=2, weights='uniform')

In [11]:
XX, YY = np.mgrid[0:20:1000j, 0:20:1000j]
Z = kn.predict(np.c_[XX.ravel(), YY.ravel()])
Z = Z.reshape(XX.shape)

In [12]:
plt.plot(xs[:100, 0],xs[:100, 1],'ro')
plt.plot(xs[100:, 0],xs[100:, 1],'go')
plt.pcolormesh(XX, YY, Z,  cmap=plt.cm.Paired)
plt.xlim(0, 20)
plt.ylim(0, 20)


Out[12]:
(0, 20)

Clustering

K-Means


In [13]:
from sklearn.cluster import KMeans, MeanShift, estimate_bandwidth
from sklearn.datasets.samples_generator import make_blobs

In [14]:
km = KMeans(n_clusters=10, n_jobs=2) # Cuantas clases queremos? cambiar n_clusters

In [15]:
km.fit(xs, y)


Out[15]:
KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=10, n_init=10,
    n_jobs=2, precompute_distances=True, random_state=None, tol=0.0001,
    verbose=0)

In [16]:
XX_km, YY_km = np.mgrid[0:20:1000j, 0:20:1000j]
Z_km = km.predict(np.c_[XX_km.ravel(), YY_km.ravel()])
Z_km = Z_km.reshape(XX_km.shape)
print Z_km


[[5 5 5 ..., 3 3 3]
 [5 5 5 ..., 3 3 3]
 [5 5 5 ..., 3 3 3]
 ..., 
 [0 0 0 ..., 6 6 6]
 [0 0 0 ..., 6 6 6]
 [0 0 0 ..., 6 6 6]]

In [17]:
plt.plot(xs[:100, 0],xs[:100, 1],'ro')
plt.plot(xs[100:, 0],xs[100:, 1],'go')
plt.pcolormesh(XX_km, YY_km, Z_km,  cmap=plt.cm.Paired)
plt.xlim(0, 20)
plt.ylim(0, 20)


Out[17]:
(0, 20)

MeanShift


In [18]:
xs_circle, _ = make_blobs(n_samples=1000, centers=[[3,3], [2,2], [1,1]], cluster_std=0.4)

In [19]:
bandwidth = estimate_bandwidth(xs_circle, quantile=0.2, n_samples=1000)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)

In [20]:
ms.fit(xs_circle)


Out[20]:
MeanShift(bandwidth=0.70798162587847879, bin_seeding=True, cluster_all=True,
     min_bin_freq=1, seeds=None)

In [21]:
cluster_centers = ms.cluster_centers_
labels = ms.labels_
print labels.shape
print cluster_centers


(1000,)
[[ 2.1063621   2.08495504]
 [ 0.99266406  0.9702425 ]
 [ 3.01717131  2.95103879]]

In [22]:
colors = ['ro', 'go', 'yo']
map(lambda p: plt.plot(p[0][0], p[0][1], colors[p[1]]), zip(xs_circle, labels))
map(lambda c: plt.plot(c[0], c[1], 'bo', markeredgecolor='k', markersize=14), cluster_centers)

print